23 October, 2020
set.seed(15102020)
bootstraps <- 1000
data_path <- "./Data/"
Proj_name <- "BRC_growth_rate"
Browns <- RColorBrewer::brewer.pal(n = 9, "YlOrBr")[9:6]
Greens <- RColorBrewer::brewer.pal(n = 9, "YlGn")[9:6]
Blues <- RColorBrewer::brewer.pal(n = 9, "YlGnBu")[9:6]
Gradient.colours <- c(Browns[1], Greens[1], Browns[2], Greens[2], Browns[3], Greens[3], Browns[4], Greens[4], Blues)This script reproduces all sequence analysis steps and plots included in the paper plus some additional exploratory analyses.
OTUmat <- t(read.csv(paste0(data_path, "Shivta_site_otuTab2.txt"), header = TRUE, row.names = 1))
sort.order <- as.numeric(gsub("OTU([0-9]+)", "\\1", colnames( OTUmat )))
OTUmat <- OTUmat[, order(sort.order )]
Metadata <- read.csv(paste0(data_path, "Shivta_metadata.csv"), row.names = 1, header = TRUE)
read_csv(paste0(data_path, "Shivta_metadata.csv"),
trim_ws = TRUE) %>%
mutate_at(
c(
"Rock.type",
"Location"
),
~(factor(.))
) %>%
column_to_rownames("Sample.code") ->
Metadata
row.names(OTUmat) <- gsub("(.*)Nimrod[0-9]+|Osnat[0-9]+", "\\1", row.names( OTUmat))
Metadata <- Metadata[order(row.names(Metadata)), ]
OTUmat <- OTUmat[order(row.names(OTUmat)), ]
# calculate sample size
Metadata$Library.size = rowSums(OTUmat)
Metadata$Location.rock <- with(Metadata, Location:Rock.type)
# Load taxonomy data
tax.file <- "Shivta_site_silva.nrv119.taxonomy"
Taxonomy <- read.table(paste0(data_path, tax.file), stringsAsFactors = FALSE) # read taxonomy file
# count how many ';' in each cell and add up to 6
for (i in 1:nrow(Taxonomy)){
semicolons <- length(gregexpr(";", Taxonomy$V2[i] )[[1]])
if (semicolons < 6){
x <- paste0( rep("Unclassified;", 6 - semicolons ), collapse = "")
Taxonomy$V2[i] <- paste0( Taxonomy$V2[i], x, sep = "")
}
}
# split taxonomy to columns
do.call( "rbind", strsplit( Taxonomy$V1, ";", fixed = TRUE)) %>%
gsub( "size=([0-9]+)", "\\1", .) %>%
data.frame( ., do.call( "rbind", strsplit( Taxonomy$V2, ";", fixed = TRUE)), stringsAsFactors = F) %>%
apply(., 2, function(x) gsub( "\\(.*\\)", "", x)) %>%
replace(., . == "unclassified", "Unclassified") ->
Taxonomy
colnames( Taxonomy ) <- c( "OTU", "Frequency", "Domain", "Phylum", "Class", "Order", "Family", "Genus" )
# rownames(Taxonomy) <- colnames(Rock_weathering_OTUmat)
rownames(Taxonomy) <- Taxonomy[, 1]
Tree_IQ <- read_tree(paste0(data_path, "Shivta_site_otuReps.filtered.align.treefile"))
# generate phyloseq object
Ps_obj <- phyloseq(otu_table(OTUmat, taxa_are_rows = FALSE),
tax_table(Taxonomy[, -c(1, 2)]),
sample_data(Metadata),
phy_tree(Tree_IQ)
)
# Reorder factors for plotting
sample_data(Ps_obj)$Location %<>% fct_relevel("Slope", "City")Remove un- and mis-classified sequences, chloroplasts and mitochondria
domains2remove <- c("", "Archaea", "Eukaryota", "Unclassified")
classes2remove <- c("Chloroplast")
families2remove <- c("Mitochondria")
Ps_obj_filt <- subset_taxa(Ps_obj, !is.na(Phylum) &
!Domain %in% domains2remove &
!Class %in% classes2remove &
!Family %in% families2remove)Ps_obj_df <-
as.data.frame(sample_data(Ps_obj_filt)) # Put sample_data into a ggplot-friendly data.frame
Ps_obj_df <- Ps_obj_df[order(Ps_obj_df$Library.size), ]
Ps_obj_df$Index <- seq(nrow(Ps_obj_df))
ggplot(data = Ps_obj_df,
aes(x = Index, y = Library.size, color = Location.rock)) +
geom_point(size = 4) +
scale_colour_manual(values = ggpomological:::pomological_palette[c(2, 1, 9, 3)], name = "Location.rock")## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 23966 44989 53000 52663 61348 77459
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3 70 198 1852 771 76180
prevdf <- apply(X = otu_table(Ps_obj_filt),
MARGIN = ifelse(taxa_are_rows(Ps_obj_filt), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Add taxonomy and total read counts to this data.frame
prevdf <- data.frame(Prevalence = prevdf,
TotalAbundance = taxa_sums(Ps_obj_filt),
tax_table(Ps_obj_filt))
prevdf %>%
group_by(Phylum) %>%
summarise(`Mean prevalence` = mean(Prevalence),
`Sum prevalence` = sum(Prevalence)) ->
Prevalence_phylum_summary
Prevalence_phylum_summary %>%
kable(., digits = c(0, 1, 0)) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F)|
Phylum |
Mean prevalence |
Sum prevalence |
|---|---|---|
|
Acidobacteria |
11.7 |
152 |
|
Actinobacteria |
16.3 |
3414 |
|
Aquificae |
4.0 |
12 |
|
Armatimonadetes |
7.6 |
76 |
|
Bacteroidetes |
11.8 |
1027 |
|
Candidate_division_KB1 |
8.0 |
8 |
|
Candidate_division_OD1 |
4.0 |
4 |
|
Candidate_division_OP11 |
4.5 |
9 |
|
Candidate_division_TM7 |
7.9 |
158 |
|
Chloroflexi |
11.0 |
1124 |
|
Cyanobacteria |
14.9 |
506 |
|
Deinococcus-Thermus |
15.8 |
174 |
|
Firmicutes |
13.4 |
214 |
|
Gemmatimonadetes |
11.2 |
617 |
|
Nitrospirae |
6.0 |
12 |
|
Planctomycetes |
10.8 |
140 |
|
Proteobacteria |
14.4 |
1817 |
|
Tenericutes |
3.0 |
3 |
|
Verrucomicrobia |
23.0 |
23 |
|
WCHB1-60 |
7.0 |
28 |
prevdf %>%
group_by(Order) %>%
summarise(`Mean prevalence` = mean(Prevalence),
`Sum prevalence` = sum(Prevalence)) ->
Prevalence_Order_summary
Prevalence_Order_summary %>%
kable(., digits = c(0, 1, 0)) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F)|
Order |
Mean prevalence |
Sum prevalence |
|---|---|---|
|
AKIW781 |
9.4 |
198 |
|
AKYG1722 |
10.7 |
75 |
|
AT425-EubC11_terrestrial_group |
10.2 |
378 |
|
Acidimicrobiales |
14.7 |
368 |
|
Alteromonadales |
12.0 |
12 |
|
Aquificales |
4.0 |
12 |
|
Ardenticatenales |
8.0 |
16 |
|
B103G10 |
7.0 |
7 |
|
BD2-11_terrestrial_group |
5.0 |
5 |
|
Bacillales |
13.7 |
164 |
|
Bacteroidales |
17.0 |
17 |
|
Bdellovibrionales |
8.0 |
40 |
|
Burkholderiales |
18.5 |
204 |
|
C0119 |
5.0 |
5 |
|
Caldilineales |
18.0 |
18 |
|
Campylobacterales |
6.0 |
6 |
|
Caulobacterales |
23.7 |
71 |
|
Chromatiales |
20.0 |
20 |
|
Chthoniobacterales |
23.0 |
23 |
|
Clostridiales |
6.5 |
13 |
|
Corynebacteriales |
8.0 |
40 |
|
Cytophagales |
11.8 |
695 |
|
Deinococcales |
15.1 |
151 |
|
Desulfovibrionales |
5.0 |
5 |
|
E6aD10 |
11.0 |
11 |
|
EMP-G18 |
3.0 |
3 |
|
Enterobacteriales |
22.0 |
44 |
|
Euzebyales |
13.5 |
202 |
|
Flavobacteriales |
12.7 |
38 |
|
Frankiales |
20.9 |
356 |
|
Gaiellales |
10.4 |
94 |
|
Gemmatimonadales |
14.3 |
214 |
|
JG30-KF-CM45 |
13.0 |
468 |
|
Kineosporiales |
20.2 |
121 |
|
Lactobacillales |
18.5 |
37 |
|
Micrococcales |
18.0 |
198 |
|
Micromonosporales |
19.5 |
39 |
|
Myxococcales |
10.2 |
51 |
|
Nitriliruptorales |
9.7 |
58 |
|
Nitrosomonadales |
11.5 |
23 |
|
Nitrospirales |
6.0 |
12 |
|
Oceanospirillales |
16.0 |
16 |
|
Orbales |
7.0 |
7 |
|
Order_II |
4.8 |
24 |
|
Order_III |
8.7 |
26 |
|
PAUC43f_marine_benthic_group |
2.0 |
2 |
|
Planctomycetales |
9.2 |
74 |
|
Propionibacteriales |
14.5 |
318 |
|
Pseudomonadales |
14.7 |
103 |
|
Pseudonocardiales |
18.0 |
306 |
|
Rhizobiales |
16.1 |
386 |
|
Rhodobacterales |
14.8 |
163 |
|
Rhodospirillales |
11.8 |
271 |
|
Rickettsiales |
11.2 |
146 |
|
Rubrobacterales |
22.3 |
491 |
|
S0134_terrestrial_group |
18.0 |
18 |
|
SBYG-4553 |
10.0 |
10 |
|
Solirubrobacterales |
17.2 |
638 |
|
Sphaerobacterales |
3.3 |
10 |
|
Sphingobacteriales |
14.2 |
227 |
|
Sphingomonadales |
19.6 |
196 |
|
Streptomycetales |
24.5 |
49 |
|
Streptosporangiales |
3.0 |
3 |
|
Subgroup_3 |
15.0 |
15 |
|
Subgroup_4 |
10.6 |
85 |
|
Subgroup_6 |
13.0 |
52 |
|
SubsectionI |
13.0 |
13 |
|
SubsectionII |
15.8 |
315 |
|
SubsectionIII |
14.8 |
118 |
|
SubsectionIV |
13.5 |
54 |
|
TRA3-20 |
14.0 |
14 |
|
Thermales |
23.0 |
23 |
|
Thermophilales |
15.0 |
30 |
|
Unclassified |
9.2 |
642 |
|
Unknown_Order |
9.8 |
98 |
|
Vampirovibrionales |
6.0 |
6 |
|
WD2101_soil_group |
13.0 |
39 |
|
Xanthomonadales |
18.0 |
18 |
Based on that we will remove all phyla with a prevalence of under 8
Prevalence_phylum_summary %>%
filter(`Sum prevalence` < 8) %>%
dplyr::select(Phylum) %>%
map(as.character) %>%
unlist() ->
filterPhyla
Ps_obj_filt %<>% subset_taxa(!Phylum %in% filterPhyla)
sample_data(Ps_obj_filt)$Library.size <- rowSums(otu_table(Ps_obj_filt))
print(Ps_obj)## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 767 taxa and 25 samples ]
## sample_data() Sample Data: [ 25 samples by 6 sample variables ]
## tax_table() Taxonomy Table: [ 767 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 767 tips and 765 internal nodes ]
## taxa are columns
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 709 taxa and 25 samples ]
## sample_data() Sample Data: [ 25 samples by 6 sample variables ]
## tax_table() Taxonomy Table: [ 709 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 709 tips and 707 internal nodes ]
## taxa are columns
# Subset to the remaining phyla
prevdf_phylum_filt <- subset(prevdf, Phylum %in% get_taxa_unique(Ps_obj_filt, "Phylum"))
ggplot(prevdf_phylum_filt,
aes(TotalAbundance, Prevalence / nsamples(Ps_obj_filt), color = Phylum)) +
# Include a guess for parameter
geom_hline(yintercept = 0.05,
alpha = 0.5,
linetype = 2) + geom_point(size = 2, alpha = 0.7) +
scale_x_log10() + xlab("Total Abundance") + ylab("Prevalence [Frac. Samples]") +
facet_wrap( ~ Phylum) + theme(legend.position = "none")# Subset to the remaining phyla
prevdf_order_filt <- subset(prevdf, Order %in% get_taxa_unique(Ps_obj_filt, "Order"))
# grab the top 30 most abundant orders
prevdf_order_filt %>%
group_by(Order) %>%
summarise(Combined.abundance = sum(TotalAbundance)) %>%
arrange(desc(Combined.abundance)) %>%
.[1:30, "Order"] ->
Orders2plot
prevdf_order_filt2 <- subset(prevdf, Order %in% Orders2plot$Order)
ggplot(prevdf_order_filt2,
aes(TotalAbundance, Prevalence / nsamples(Ps_obj_filt), color = Order)) +
# Include a guess for parameter
geom_hline(yintercept = 0.05,
alpha = 0.5,
linetype = 2) + geom_point(size = 2, alpha = 0.7) +
scale_x_log10() + xlab("Total Abundance") + ylab("Prevalence [Frac. Samples]") +
facet_wrap( ~ Order) + theme(legend.position = "none")We will remove all sequences which appear in less than 10% of the samples
# Define prevalence threshold as 10% of total samples
prevalenceThreshold <- 0.1 * nsamples(Ps_obj_filt)
prevalenceThreshold## [1] 2.5
# Execute prevalence filter, using `prune_taxa()` function
keepTaxa <-
row.names(prevdf_phylum_filt)[(prevdf_phylum_filt$Prevalence >= prevalenceThreshold)]
Ps_obj_filt %<>% prune_taxa(keepTaxa, .)
sample_data(Ps_obj_filt)$Library.size <- rowSums(otu_table(Ps_obj_filt))
print(Ps_obj)## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 767 taxa and 25 samples ]
## sample_data() Sample Data: [ 25 samples by 6 sample variables ]
## tax_table() Taxonomy Table: [ 767 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 767 tips and 765 internal nodes ]
## taxa are columns
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 694 taxa and 25 samples ]
## sample_data() Sample Data: [ 25 samples by 6 sample variables ]
## tax_table() Taxonomy Table: [ 694 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 694 tips and 692 internal nodes ]
## taxa are columns
This removed 73 or 10% of the sequences.
First let’s look at the count data distribution after filtering:
sample_data(Ps_obj_filt) %>%
as_tibble() %>%
dplyr::select(Sample.name, Library.size) %>%
as(., "data.frame") %>%
kable(.) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F)|
Sample.name |
Library.size |
|---|---|
|
City Chalk 2 |
49453 |
|
City Chalk 3 |
54155 |
|
City Chalk 4 |
44943 |
|
City Chalk 5 |
58835 |
|
City Chalk 6 |
77459 |
|
City Limestone 1 |
36122 |
|
City Limestone 2 |
52816 |
|
City Limestone 3 |
48378 |
|
City Limestone 4 |
47108 |
|
City Limestone 5 |
61346 |
|
City Limestone 6 |
54505 |
|
Slope Limestone 1 |
75578 |
|
Slope Chalk 1 |
51651 |
|
Slope Chalk 2 |
45197 |
|
Slope Chalk 3 |
33625 |
|
Slope Chalk 4 |
70886 |
|
Slope Chalk 5 |
71458 |
|
Slope Chalk 6 |
58959 |
|
Slope Chalk 7 |
36756 |
|
City Chalk 1 |
23962 |
|
Slope Limestone 2 |
62456 |
|
Slope Limestone 3 |
36628 |
|
Slope Limestone 4 |
57283 |
|
Slope Limestone 5 |
64509 |
|
Slope Limestone 6 |
41564 |
The figure and table indicate only a small deviation in the number of reads per samples.
(mod1 <- adonis(
otu_table(Ps_obj_filt) ~ Library.size,
data = as(sample_data(Ps_obj_filt), "data.frame"),
method = "horn",
permutations = 9999
))##
## Call:
## adonis(formula = otu_table(Ps_obj_filt) ~ Library.size, data = as(sample_data(Ps_obj_filt), "data.frame"), permutations = 9999, method = "horn")
##
## Permutation: free
## Number of permutations: 9999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Library.size 1 0.4246 0.42456 1.3337 0.05481 0.2192
## Residuals 23 7.3218 0.31834 0.94519
## Total 24 7.7464 1.00000
notAllZero <- (rowSums(t(otu_table(Ps_obj_filt))) > 0)
vsn::meanSdPlot(as.matrix(log2(t(otu_table(Ps_obj_filt))[notAllZero, ] + 1)))The difference in library sizes is low and its effect on the community composition is minimal. We’ll use the GMPR method for library size normalisation (Chen and Chen 2017)
Ps_obj_filt_GMPR <- Ps_obj_filt
Ps_obj_filt %>%
otu_table(.) %>%
t() %>%
as(., "matrix") %>%
GMPR() ->
GMPR_factors## Begin GMPR size factor calculation ...
## Completed!
## Please watch for the samples with limited sharing with other samples based on NSS! They may be outliers!
Ps_obj_filt %>%
otu_table(.) %>%
t() %*% diag(1 / GMPR_factors$gmpr) %>%
t() %>%
as.data.frame(., row.names = sample_names(Ps_obj_filt)) %>%
otu_table(., taxa_are_rows = FALSE) ->
otu_table(Ps_obj_filt_GMPR)
sample_data(Ps_obj_filt_GMPR)$Library.size <- sample_sums(Ps_obj_filt)
adonis(
otu_table(Ps_obj_filt_GMPR) ~ Library.size,
data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"),
method = "horn",
permutations = 9999
)##
## Call:
## adonis(formula = otu_table(Ps_obj_filt_GMPR) ~ Library.size, data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"), permutations = 9999, method = "horn")
##
## Permutation: free
## Number of permutations: 9999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Library.size 1 0.4246 0.42456 1.3337 0.05481 0.228
## Residuals 23 7.3218 0.31834 0.94519
## Total 24 7.7464 1.00000
Did it improve anything?
notAllZero <- (rowSums(t(otu_table(Ps_obj_filt_GMPR))) > 0)
vsn::meanSdPlot(as.matrix(log2(t(otu_table(Ps_obj_filt_GMPR))[notAllZero, ] + 1)))We do that by simulating 1000 rarefaction events and calculating the metrics each time. Then, the result is averaged.
rarefaction.mat <- matrix(0, nrow = nsamples(Ps_obj_filt), ncol = bootstraps)
rownames(rarefaction.mat) <- sample_names(Ps_obj_filt)
rich.ests <- list(S.obs = rarefaction.mat, S.chao1 = rarefaction.mat, se.chao1 = rarefaction.mat,
S.ACE = rarefaction.mat, se.ACE = rarefaction.mat)
for (i in seq(bootstraps)) {
sub.OTUmat <- rrarefy(otu_table(Ps_obj_filt), min(rowSums(otu_table(Ps_obj_filt))))
for (j in seq(length(rich.ests))) {
rich.ests[[j]][, i] <- t(estimateR(sub.OTUmat))[, j]
}
}
Richness <- data.frame(row.names = row.names(rich.ests[[1]]))
for (i in c(1, seq(2, length(rich.ests), 2))) {
S <- apply(rich.ests[[i]], 1, mean)
if (i == 1) {
se <- apply(rich.ests[[i]], 1, function(x) (mean(x)/sqrt(length(x))))
} else se <- apply(rich.ests[[i + 1]], 1, mean)
Richness <- cbind(Richness, S, se)
}
colnames(Richness) <- c("S.obs", "S.obs.se", "S.chao1", "S.chao1.se", "S.ACE", "S.ACE.se")
saveRDS(Richness, file = paste0("./Results/", Proj_name, "_richness.RDS"))
write.csv(Richness, file = paste0("./Results/", Proj_name, "_richness.csv"))
ses <- grep("\\.se", colnames(Richness))
Richness[, ses] %>%
gather(key = "est.se") -> se.dat
Richness[, -unique(ses)] %>%
gather(key = "est") -> mean.dat
n <- length(unique(mean.dat$est))
# diversity indices
diversity.inds <- list(Shannon = rarefaction.mat, inv.simpson = rarefaction.mat, BP = rarefaction.mat)
for (i in seq(bootstraps)) {
sub.OTUmat <- rrarefy(otu_table(Ps_obj_filt), min(rowSums(otu_table(Ps_obj_filt))))
diversity.inds$Shannon[, i] <- diversityresult(sub.OTUmat, index = 'Shannon', method = 'each site', digits = 3)[, 1]
diversity.inds$inv.simpson[, i] <- diversityresult(sub.OTUmat, index = 'inverseSimpson', method = 'each site', digits = 3)[, 1]
diversity.inds$BP[, i] <- diversityresult(sub.OTUmat, index = 'Berger', method = 'each site', digits = 3)[, 1]
}
Diversity <- data.frame(row.names = row.names(diversity.inds[[1]]))
for (i in seq(length(diversity.inds))) {
S <- apply(diversity.inds[[i]], 1, mean)
se <- apply(diversity.inds[[i]], 1, function(x) (mean(x)/sqrt(length(x))))
Diversity <- cbind(Diversity, S, se)
}
colnames(Diversity) <- c("Shannon", "Shannon.se", "Inv.simpson", "Inv.simpson.se", "BP", "BP.se")
ses <- grep("\\.se", colnames(Diversity))
Diversity[, ses] %>% gather(key = "est.se") -> se.dat
Diversity[, -unique(ses)] %>% gather(key = "est") -> mean.dat
saveRDS(Diversity, file = paste0("./Results/", Proj_name, "_diversity.RDS"))
write.csv(Diversity, file = paste0("./Results/", Proj_name, "_diversity.csv"))# make combined richness diversity
Richness_Diversity <- cbind(Richness, Diversity)
ses <- grep("\\.se", colnames(Richness_Diversity))
Richness_Diversity[, ses] %>%
gather(key = "est.se") ->
se.dat
Richness_Diversity[, -unique(ses)] %>%
gather(key = "Metric",
value = "Estimate") ->
mean.dat
Richness_Diversity_long <-
cbind(
Sample = rep(rownames(Richness_Diversity), times = length(unique(mean.dat$Metric))),
mean.dat,
lerr = mean.dat$Estimate - se.dat$value,
herr = mean.dat$Estimate + se.dat$value
)
Richness_Diversity_long$Metric <-
factor(
Richness_Diversity_long$Metric,
levels = c("S.obs", "S.chao1", "S.ACE", "Shannon", "Inv.simpson", "BP"),
labels = c("S obs.", "Chao1", "ACE", "Shannon", "Inv. Simpson" , "Berger Parker")
)
Richness_Diversity_long %<>%
cbind(.,
sample_data(Ps_obj_filt))
# S Obs
data2test <- Richness_Diversity_long[Richness_Diversity_long$Metric == "S obs.", ]
(mod_obsS <- TestAlphaV3(filter(Richness_Diversity_long, Metric == "S obs.")))## Call:
## aov(formula = as.formula(paste(response_name, paste(factor_names[1],
## factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
##
## Terms:
## Location Rock.type Location:Rock.type Residuals
## Sum of Squares 68 647 10123 62200
## Deg. of Freedom 1 1 1 21
##
## Residual standard error: 54.4
## Estimated effects may be unbalanced
## [1] "Unequal group sizes - showing SS type III"
## Anova Table (Type III tests)
##
## Response: Estimate
## Sum Sq Df F value Pr(>F)
## (Intercept) 588166 1 198.58 3.6e-12 ***
## Location 3993 1 1.35 0.259
## Rock.type 7767 1 2.62 0.120
## Location:Rock.type 10123 1 3.42 0.079 .
## Residuals 62200 21
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tables of means
## Grand mean
##
## 311
##
## Location
## Slope City
## 312 309
## rep 13 12
##
## Rock.type
## Chalk Limestone
## 306 316
## rep 13 12
##
## Location:Rock.type
## Rock.type
## Location Chalk Limestone
## Slope 290 339
## rep 7 6
## City 325 293
## rep 6 6
## Call:
## aov(formula = as.formula(paste(response_name, paste(factor_names[1],
## factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
##
## Terms:
## Location Rock.type Location:Rock.type Residuals
## Sum of Squares 68 647 10123 62200
## Deg. of Freedom 1 1 1 21
##
## Residual standard error: 54.4
## Estimated effects may be unbalanced
|
Location |
Rock.type |
emmean |
SE |
df |
lower.CL |
upper.CL |
|---|---|---|---|---|---|---|
|
Slope |
Chalk |
290 |
20.6 |
21 |
247 |
333 |
|
City |
Chalk |
325 |
22.2 |
21 |
279 |
371 |
|
Slope |
Limestone |
339 |
22.2 |
21 |
293 |
385 |
|
City |
Limestone |
293 |
22.2 |
21 |
247 |
340 |
## contrast estimate SE df t.ratio p.value
## Slope Chalk - City Chalk -35.2 30.3 21 -1.161 0.6570
## Slope Chalk - Slope Limestone -49.0 30.3 21 -1.619 0.3900
## Slope Chalk - City Limestone -3.5 30.3 21 -0.116 0.9990
## City Chalk - Slope Limestone -13.9 31.4 21 -0.442 0.9700
## City Chalk - City Limestone 31.6 31.4 21 1.007 0.7470
## Slope Limestone - City Limestone 45.5 31.4 21 1.449 0.4850
##
## P value adjustment: tukey method for comparing a family of 4 estimates
(obsS_pairwise <- cld(marginal,
alpha = 0.05,
Letters = letters,
adjust = "tukey")) # works with lm but not with two-factor ART|
Location |
Rock.type |
emmean |
SE |
df |
lower.CL |
upper.CL |
.group |
|
|---|---|---|---|---|---|---|---|---|
|
1 |
Slope |
Chalk |
290 |
20.6 |
21 |
234 |
346 |
a |
|
4 |
City |
Limestone |
293 |
22.2 |
21 |
233 |
354 |
a |
|
2 |
City |
Chalk |
325 |
22.2 |
21 |
265 |
386 |
a |
|
3 |
Slope |
Limestone |
339 |
22.2 |
21 |
278 |
399 |
a |
|
Df |
Sum Sq |
Mean Sq |
F value |
Pr(>F) |
Part Eta Sq |
|---|---|---|---|---|---|
|
1 |
67.7 |
67.7 |
0.023 |
0.881 |
0.001 |
|
1 |
647.2 |
647.2 |
0.218 |
0.645 |
0.009 |
|
1 |
10123.1 |
10123.1 |
3.418 |
0.079 |
0.139 |
|
21 |
62200.5 |
2961.9 |
NA |
NA |
0.852 |
# pwpp(marginal) # Pairwise P-value plot. Fails for unbalanced design
emmip(mod_obsS, Location ~ Rock.type)# summary(as.glht(pairs(marginal))) # fails because of unbalanced design
# Shannon
(mod_Shannon <- TestAlphaV3(filter(Richness_Diversity_long, Metric == "Shannon")))## Call:
## aov(formula = as.formula(paste(response_name, paste(factor_names[1],
## factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
##
## Terms:
## Location Rock.type Location:Rock.type Residuals
## Sum of Squares 1.33 0.09 0.26 4.79
## Deg. of Freedom 1 1 1 21
##
## Residual standard error: 0.478
## Estimated effects may be unbalanced
## [1] "Unequal group sizes - showing SS type III"
## Anova Table (Type III tests)
##
## Response: Estimate
## Sum Sq Df F value Pr(>F)
## (Intercept) 308.9 1 1353.08 <2e-16 ***
## Location 1.4 1 6.10 0.022 *
## Rock.type 0.1 1 0.36 0.554
## Location:Rock.type 0.3 1 1.15 0.295
## Residuals 4.8 21
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tables of means
## Grand mean
##
## 3.53
##
## Location
## Slope City
## 3.75 3.29
## rep 13.00 12.00
##
## Rock.type
## Chalk Limestone
## 3.47 3.59
## rep 13.00 12.00
##
## Location:Rock.type
## Rock.type
## Location Chalk Limestone
## Slope 3.60 3.92
## rep 7.00 6.00
## City 3.33 3.24
## rep 6.00 6.00
## Call:
## aov(formula = as.formula(paste(response_name, paste(factor_names[1],
## factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
##
## Terms:
## Location Rock.type Location:Rock.type Residuals
## Sum of Squares 1.33 0.09 0.26 4.79
## Deg. of Freedom 1 1 1 21
##
## Residual standard error: 0.478
## Estimated effects may be unbalanced
|
Location |
Rock.type |
emmean |
SE |
df |
lower.CL |
upper.CL |
|---|---|---|---|---|---|---|
|
Slope |
Chalk |
3.60 |
0.181 |
21 |
3.22 |
3.98 |
|
City |
Chalk |
3.33 |
0.195 |
21 |
2.93 |
3.74 |
|
Slope |
Limestone |
3.92 |
0.195 |
21 |
3.51 |
4.33 |
|
City |
Limestone |
3.24 |
0.195 |
21 |
2.84 |
3.65 |
## contrast estimate SE df t.ratio p.value
## Slope Chalk - City Chalk 0.268 0.266 21 1.007 0.7470
## Slope Chalk - Slope Limestone -0.321 0.266 21 -1.206 0.6300
## Slope Chalk - City Limestone 0.358 0.266 21 1.347 0.5450
## City Chalk - Slope Limestone -0.588 0.276 21 -2.133 0.1750
## City Chalk - City Limestone 0.090 0.276 21 0.328 0.9880
## Slope Limestone - City Limestone 0.679 0.276 21 2.460 0.0960
##
## P value adjustment: tukey method for comparing a family of 4 estimates
(Shannon_pairwise <- cld(marginal,
alpha = 0.05,
Letters = letters,
adjust = "tukey")) # works with lm but not with two-factor ART|
Location |
Rock.type |
emmean |
SE |
df |
lower.CL |
upper.CL |
.group |
|
|---|---|---|---|---|---|---|---|---|
|
4 |
City |
Limestone |
3.24 |
0.195 |
21 |
2.71 |
3.77 |
a |
|
2 |
City |
Chalk |
3.33 |
0.195 |
21 |
2.80 |
3.86 |
a |
|
1 |
Slope |
Chalk |
3.60 |
0.181 |
21 |
3.11 |
4.09 |
a |
|
3 |
Slope |
Limestone |
3.92 |
0.195 |
21 |
3.39 |
4.45 |
a |
|
Df |
Sum Sq |
Mean Sq |
F value |
Pr(>F) |
Part Eta Sq |
|---|---|---|---|---|---|
|
1 |
1.325 |
1.325 |
5.804 |
0.025 |
0.205 |
|
1 |
0.094 |
0.094 |
0.412 |
0.528 |
0.015 |
|
1 |
0.263 |
0.263 |
1.151 |
0.295 |
0.041 |
|
21 |
4.795 |
0.228 |
NA |
NA |
0.740 |
# pwpp(marginal) # Pairwise P-value plot. Fails for unbalanced design
emmip(mod_Shannon, Location ~ Rock.type)## Call:
## aov(formula = as.formula(paste(response_name, paste(factor_names[1],
## factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
##
## Terms:
## Location Rock.type Location:Rock.type Residuals
## Sum of Squares 10 1969 8756 68810
## Deg. of Freedom 1 1 1 21
##
## Residual standard error: 57.2
## Estimated effects may be unbalanced
## [1] "Unequal group sizes - showing SS type III"
## Anova Table (Type III tests)
##
## Response: Estimate
## Sum Sq Df F value Pr(>F)
## (Intercept) 4281315 1 1306.61 <2e-16 ***
## Location 4 1 0.00 0.97
## Rock.type 1671 1 0.51 0.48
## Location:Rock.type 8756 1 2.67 0.12
## Residuals 68810 21
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tables of means
## Grand mean
##
## 414
##
## Location
## Slope City
## 413 414
## rep 13 12
##
## Rock.type
## Chalk Limestone
## 405 423
## rep 13 12
##
## Location:Rock.type
## Rock.type
## Location Chalk Limestone
## Slope 388 442
## rep 7 6
## City 425 404
## rep 6 6
## Call:
## aov(formula = as.formula(paste(response_name, paste(factor_names[1],
## factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
##
## Terms:
## Location Rock.type Location:Rock.type Residuals
## Sum of Squares 10 1969 8756 68810
## Deg. of Freedom 1 1 1 21
##
## Residual standard error: 57.2
## Estimated effects may be unbalanced
|
Location |
Rock.type |
emmean |
SE |
df |
lower.CL |
upper.CL |
|---|---|---|---|---|---|---|
|
Slope |
Chalk |
388 |
21.6 |
21 |
343 |
433 |
|
City |
Chalk |
425 |
23.4 |
21 |
376 |
474 |
|
Slope |
Limestone |
442 |
23.4 |
21 |
394 |
491 |
|
City |
Limestone |
404 |
23.4 |
21 |
355 |
452 |
## contrast estimate SE df t.ratio p.value
## Slope Chalk - City Chalk -36.7 31.8 21 -1.153 0.6620
## Slope Chalk - Slope Limestone -53.9 31.8 21 -1.693 0.3520
## Slope Chalk - City Limestone -15.6 31.8 21 -0.490 0.9610
## City Chalk - Slope Limestone -17.2 33.0 21 -0.520 0.9530
## City Chalk - City Limestone 21.1 33.0 21 0.639 0.9180
## Slope Limestone - City Limestone 38.3 33.0 21 1.159 0.6580
##
## P value adjustment: tukey method for comparing a family of 4 estimates
(ACE_pairwise <- cld(marginal,
alpha = 0.05,
Letters = letters,
adjust = "tukey")) # works with lm but not with two-factor ART|
Location |
Rock.type |
emmean |
SE |
df |
lower.CL |
upper.CL |
.group |
|
|---|---|---|---|---|---|---|---|---|
|
1 |
Slope |
Chalk |
388 |
21.6 |
21 |
329 |
447 |
a |
|
4 |
City |
Limestone |
404 |
23.4 |
21 |
340 |
467 |
a |
|
2 |
City |
Chalk |
425 |
23.4 |
21 |
361 |
489 |
a |
|
3 |
Slope |
Limestone |
442 |
23.4 |
21 |
378 |
506 |
a |
|
Df |
Sum Sq |
Mean Sq |
F value |
Pr(>F) |
Part Eta Sq |
|---|---|---|---|---|---|
|
1 |
10.1 |
10.1 |
0.003 |
0.956 |
0.000 |
|
1 |
1969.3 |
1969.3 |
0.601 |
0.447 |
0.025 |
|
1 |
8755.5 |
8755.5 |
2.672 |
0.117 |
0.110 |
|
21 |
68809.7 |
3276.7 |
NA |
NA |
0.865 |
# pwpp(marginal) # Pairwise P-value plot. Fails for unbalanced design
emmip(mod_ACE, Location ~ Rock.type)# summary(as.glht(pairs(marginal))) # fails because of unbalanced design
#Inv. Simpson
(mod_InvSim <- TestAlphaV3(filter(Richness_Diversity_long, Metric == "Inv. Simpson")))## Call:
## aov(formula = as.formula(paste(response_name, paste(factor_names[1],
## factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
##
## Terms:
## Location Rock.type Location:Rock.type Residuals
## Sum of Squares 508 107 117 1060
## Deg. of Freedom 1 1 1 21
##
## Residual standard error: 7.1
## Estimated effects may be unbalanced
## [1] "Unequal group sizes - showing SS type III"
## Anova Table (Type III tests)
##
## Response: Estimate
## Sum Sq Df F value Pr(>F)
## (Intercept) 7346 1 145.56 6.6e-11 ***
## Location 543 1 10.76 0.0036 **
## Rock.type 99 1 1.96 0.1757
## Location:Rock.type 117 1 2.32 0.1427
## Residuals 1060 21
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tables of means
## Grand mean
##
## 17.2
##
## Location
## Slope City
## 21.5 12.5
## rep 13.0 12.0
##
## Rock.type
## Chalk Limestone
## 15.2 19.4
## rep 13.0 12.0
##
## Location:Rock.type
## Rock.type
## Location Chalk Limestone
## Slope 17.7 26.0
## rep 7.0 6.0
## City 12.7 12.3
## rep 6.0 6.0
## Call:
## aov(formula = as.formula(paste(response_name, paste(factor_names[1],
## factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
##
## Terms:
## Location Rock.type Location:Rock.type Residuals
## Sum of Squares 508 107 117 1060
## Deg. of Freedom 1 1 1 21
##
## Residual standard error: 7.1
## Estimated effects may be unbalanced
|
Location |
Rock.type |
emmean |
SE |
df |
lower.CL |
upper.CL |
|---|---|---|---|---|---|---|
|
Slope |
Chalk |
17.7 |
2.69 |
21 |
12.10 |
23.3 |
|
City |
Chalk |
12.7 |
2.90 |
21 |
6.65 |
18.7 |
|
Slope |
Limestone |
26.0 |
2.90 |
21 |
19.98 |
32.0 |
|
City |
Limestone |
12.3 |
2.90 |
21 |
6.31 |
18.4 |
## contrast estimate SE df t.ratio p.value
## Slope Chalk - City Chalk 5.00 3.95 21 1.270 0.5940
## Slope Chalk - Slope Limestone -8.33 3.95 21 -2.110 0.1830
## Slope Chalk - City Limestone 5.35 3.95 21 1.350 0.5410
## City Chalk - Slope Limestone -13.33 4.10 21 -3.250 0.0190
## City Chalk - City Limestone 0.35 4.10 21 0.080 1.0000
## Slope Limestone - City Limestone 13.68 4.10 21 3.330 0.0150
##
## P value adjustment: tukey method for comparing a family of 4 estimates
(InvSim_pairwise <- cld(marginal,
alpha = 0.05,
Letters = letters,
adjust = "tukey")) # works with lm but not with two-factor ART|
Location |
Rock.type |
emmean |
SE |
df |
lower.CL |
upper.CL |
.group |
|
|---|---|---|---|---|---|---|---|---|
|
4 |
City |
Limestone |
12.3 |
2.90 |
21 |
4.44 |
20.2 |
a |
|
2 |
City |
Chalk |
12.7 |
2.90 |
21 |
4.79 |
20.6 |
a |
|
1 |
Slope |
Chalk |
17.7 |
2.69 |
21 |
10.37 |
25.0 |
ab |
|
3 |
Slope |
Limestone |
26.0 |
2.90 |
21 |
18.12 |
33.9 |
b |
|
Df |
Sum Sq |
Mean Sq |
F value |
Pr(>F) |
Part Eta Sq |
|---|---|---|---|---|---|
|
1 |
508 |
507.7 |
10.06 |
0.005 |
0.283 |
|
1 |
107 |
107.4 |
2.13 |
0.159 |
0.060 |
|
1 |
117 |
117.0 |
2.32 |
0.143 |
0.065 |
|
21 |
1060 |
50.5 |
NA |
NA |
0.591 |
# pwpp(marginal) # Pairwise P-value plot. Fails for unbalanced design
emmip(mod_InvSim, Location ~ Rock.type)# summary(as.glht(pairs(marginal))) # fails because of unbalanced design
#Berger Parker
(mod_BP <- TestAlphaV3(filter(Richness_Diversity_long, Metric == "Berger Parker")))## Call:
## aov(formula = as.formula(paste(response_name, paste(factor_names[1],
## factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
##
## Terms:
## Location Rock.type Location:Rock.type Residuals
## Sum of Squares 0.0718 0.0103 0.0002 0.1120
## Deg. of Freedom 1 1 1 21
##
## Residual standard error: 0.073
## Estimated effects may be unbalanced
## [1] "Unequal group sizes - showing SS type III"
## Anova Table (Type III tests)
##
## Response: Estimate
## Sum Sq Df F value Pr(>F)
## (Intercept) 0.952 1 178.41 9.9e-12 ***
## Location 0.074 1 13.87 0.0013 **
## Rock.type 0.010 1 1.91 0.1817
## Location:Rock.type 0.000 1 0.04 0.8508
## Residuals 0.112 21
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tables of means
## Grand mean
##
## 0.194
##
## Location
## Slope City
## 0.143 0.25
## rep 13.000 12.00
##
## Rock.type
## Chalk Limestone
## 0.214 0.173
## rep 13.000 12.000
##
## Location:Rock.type
## Rock.type
## Location Chalk Limestone
## Slope 0.16 0.12
## rep 7.00 6.00
## City 0.27 0.23
## rep 6.00 6.00
## Call:
## aov(formula = as.formula(paste(response_name, paste(factor_names[1],
## factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
##
## Terms:
## Location Rock.type Location:Rock.type Residuals
## Sum of Squares 0.0718 0.0103 0.0002 0.1120
## Deg. of Freedom 1 1 1 21
##
## Residual standard error: 0.073
## Estimated effects may be unbalanced
|
Location |
Rock.type |
emmean |
SE |
df |
lower.CL |
upper.CL |
|---|---|---|---|---|---|---|
|
Slope |
Chalk |
0.164 |
0.028 |
21 |
0.107 |
0.221 |
|
City |
Chalk |
0.268 |
0.030 |
21 |
0.206 |
0.330 |
|
Slope |
Limestone |
0.118 |
0.030 |
21 |
0.056 |
0.180 |
|
City |
Limestone |
0.233 |
0.030 |
21 |
0.171 |
0.295 |
## contrast estimate SE df t.ratio p.value
## Slope Chalk - City Chalk -0.1035 0.0406 21 -2.550 0.0810
## Slope Chalk - Slope Limestone 0.0460 0.0406 21 1.130 0.6740
## Slope Chalk - City Limestone -0.0686 0.0406 21 -1.690 0.3550
## City Chalk - Slope Limestone 0.1495 0.0422 21 3.540 0.0100
## City Chalk - City Limestone 0.0349 0.0422 21 0.830 0.8410
## Slope Limestone - City Limestone -0.1146 0.0422 21 -2.720 0.0580
##
## P value adjustment: tukey method for comparing a family of 4 estimates
(BP_pairwise <- cld(marginal,
alpha = 0.05,
Letters = letters,
adjust = "tukey")) # works with lm but not with two-factor ART|
Location |
Rock.type |
emmean |
SE |
df |
lower.CL |
upper.CL |
.group |
|
|---|---|---|---|---|---|---|---|---|
|
3 |
Slope |
Limestone |
0.118 |
0.030 |
21 |
0.037 |
0.199 |
a |
|
1 |
Slope |
Chalk |
0.164 |
0.028 |
21 |
0.089 |
0.239 |
ab |
|
4 |
City |
Limestone |
0.233 |
0.030 |
21 |
0.151 |
0.314 |
ab |
|
2 |
City |
Chalk |
0.268 |
0.030 |
21 |
0.186 |
0.349 |
b |
|
Df |
Sum Sq |
Mean Sq |
F value |
Pr(>F) |
Part Eta Sq |
|---|---|---|---|---|---|
|
1 |
0.072 |
0.072 |
13.459 |
0.001 |
0.369 |
|
1 |
0.010 |
0.010 |
1.930 |
0.179 |
0.053 |
|
1 |
0.000 |
0.000 |
0.036 |
0.851 |
0.001 |
|
21 |
0.112 |
0.005 |
NA |
NA |
0.577 |
# pwpp(marginal) # Pairwise P-value plot. Fails for unbalanced design
emmip(mod_BP, Location ~ Rock.type)Richness_Diversity_long %>%
dplyr::filter(!Metric %in% c("Chao1", "ACE")) %>%
mutate_at(., "Metric", ~fct_recode(., "Observed S" = "S obs.", "Inv. Simpson" = "Inv. Simpson", "Berger Parker" = "Berger Parker")) %>%
mutate_at(., "Metric", ~fct_relevel(., "Observed S", "Inv. Simpson", "Shannon", "Berger Parker")) %>%
droplevels() ->
Richness_Diversity_long2plot
p_alpha <- ggplot() +
geom_violin(data = Richness_Diversity_long2plot,
aes(
x = Location,
y = Estimate,
ymin = lerr,
ymax = herr
), colour = "grey",
fill = "grey",
alpha = 1 / 3) +
geom_jitter2(data = Richness_Diversity_long2plot,
aes(x = Location,
y = Estimate,
ymin = lerr,
ymax = herr,
colour = Location), size = 3, width = 0.2, alpha = 2/3) +
scale_colour_manual(values = Gradient.colours[c(5, 6, 11)], name = "") +
# geom_errorbar(alpha = 1 / 2, width = 0.3) +
xlab("") +
ylab("") +
theme(axis.text.x = element_text(
angle = 45,
vjust = 0.9,
hjust = 1
),
legend.position="none") +
facet_grid(Metric ~ Rock.type, scale = "free") +
background_grid(major = "y",
minor = "none",
size.major = 0.8)
dat_text <- data.frame(
label = str_remove_all(c(obsS_pairwise$.group[1:4],
Shannon_pairwise$.group[1:4],
InvSim_pairwise$.group[1:4],
BP_pairwise$.group[1:4]),
pattern = " "),
Metric = fct_inorder(rep(levels(Richness_Diversity_long2plot$Metric), each = 4)),
Rock.type = fct_c(obsS_pairwise$Rock.type[1:4],
Shannon_pairwise$Rock.type[1:4],
InvSim_pairwise$Rock.type[1:4],
BP_pairwise$Rock.type[1:4]),
x = fct_c(obsS_pairwise$Location[1:4],
Shannon_pairwise$Location[1:4],
InvSim_pairwise$Location[1:4],
BP_pairwise$Location[1:4]),
# x = as.factor(levels(Richness_Diversity_long2plot$Climate.Source)),
y = rep(c(520, 45, 5.2, 0.52), each = 4)
# y = rep(c(40, 140, 0.5), each = 6)
)
p_alpha <- p_alpha + geom_text(
data = dat_text,
mapping = aes(x = x, y = y, label = label),
nudge_x = 0,
nudge_y = 0
)
print(p_alpha)Calculate and plot beta diversity metrics.
(mod1 <- adonis(
otu_table(Ps_obj_filt_GMPR) ~ Location * Rock.type,
data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"),
method = "horn",
permutations = 9999
))##
## Call:
## adonis(formula = otu_table(Ps_obj_filt_GMPR) ~ Location * Rock.type, data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"), permutations = 9999, method = "horn")
##
## Permutation: free
## Number of permutations: 9999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Location 1 0.63 0.629 2.12 0.081 0.024 *
## Rock.type 1 0.39 0.386 1.30 0.050 0.231
## Location:Rock.type 1 0.49 0.486 1.63 0.063 0.101
## Residuals 21 6.25 0.297 0.806
## Total 24 7.75 1.000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(mod2 <- adonis(
otu_table(Ps_obj_filt_GMPR) ~ Location,
data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"),
method = "horn",
permutations = 9999
))##
## Call:
## adonis(formula = otu_table(Ps_obj_filt_GMPR) ~ Location, data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"), permutations = 9999, method = "horn")
##
## Permutation: free
## Number of permutations: 9999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Location 1 0.63 0.629 2.03 0.081 0.035 *
## Residuals 23 7.12 0.309 0.919
## Total 24 7.75 1.000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod1_pairwise <- PairwiseAdonis(
otu_table(Ps_obj_filt_GMPR),
sample_data(Ps_obj_filt_GMPR)$Location,
sim.function = "vegdist",
sim.method = "horn",
p.adjust.m = "BH"
)
print(mod1_pairwise)## pairs total.DF F.Model R2 p.value p.adjusted sig
## 1 City - Slope 24 2.03 0.0812 0.038 0.038 .
## [[1]]
## [1] "City - Slope"
mod2_pairwise <- PairwiseAdonis(
otu_table(Ps_obj_filt_GMPR),
sample_data(Ps_obj_filt_GMPR)$Rock.type,
sim.function = "vegdist",
sim.method = "horn",
p.adjust.m = "BH"
)
print(mod2_pairwise)## pairs total.DF F.Model R2 p.value p.adjusted sig
## 1 Chalk - Limestone 24 1.21 0.0501 0.281 0.281
## [[1]]
## character(0)
mod3_pairwise <- PairwiseAdonis(
otu_table(Ps_obj_filt_GMPR),
sample_data(Ps_obj_filt_GMPR)$Location.rock,
sim.function = "vegdist",
sim.method = "horn",
p.adjust.m = "BH"
)
print(mod3_pairwise)## pairs total.DF F.Model R2 p.value p.adjusted sig
## 1 City:Chalk - City:Limestone 11 1.20 0.1069 0.329 0.395
## 2 City:Chalk - Slope:Limestone 11 1.43 0.1251 0.182 0.273
## 3 City:Chalk - Slope:Chalk 12 2.92 0.2099 0.012 0.072
## 4 City:Limestone - Slope:Limestone 11 1.07 0.0967 0.424 0.424
## 5 City:Limestone - Slope:Chalk 12 1.99 0.1531 0.089 0.178
## 6 Slope:Limestone - Slope:Chalk 12 1.90 0.1471 0.074 0.178
## [[1]]
## [1] "City:Chalk - Slope:Chalk"
Unifrac_mat <- UniFrac(Ps_obj_filt,
weighted = TRUE,
normalized = TRUE,
parallel = TRUE,
fast = TRUE)
(mod1 <- adonis(
Unifrac_mat ~ Location * Rock.type,
data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"),
method = "horn",
permutations = 9999
))##
## Call:
## adonis(formula = Unifrac_mat ~ Location * Rock.type, data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"), permutations = 9999, method = "horn")
##
## Permutation: free
## Number of permutations: 9999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Location 1 0.044 0.0443 1.24 0.049 0.268
## Rock.type 1 0.050 0.0496 1.39 0.055 0.193
## Location:Rock.type 1 0.060 0.0601 1.68 0.066 0.097 .
## Residuals 21 0.751 0.0357 0.830
## Total 24 0.905 1.000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(mod2 <- adonis(
Unifrac_mat ~ Location,
data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"),
method = "horn",
permutations = 9999
))##
## Call:
## adonis(formula = Unifrac_mat ~ Location, data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"), permutations = 9999, method = "horn")
##
## Permutation: free
## Number of permutations: 9999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Location 1 0.044 0.0443 1.19 0.049 0.28
## Residuals 23 0.860 0.0374 0.951
## Total 24 0.905 1.000
The difference between city and slope is significant based on the Morisita-Horn distances between OTUs but not based on UniFrac.
Ps_obj_ord1 <- ordinate(Ps_obj_filt_GMPR, "CAP", "horn", formula = Ps_obj_filt_GMPR ~ Location * Rock.type)
Ps_obj_ord2 <- ordinate(Ps_obj_filt_GMPR, "CAP", "horn", formula = Ps_obj_filt_GMPR ~ Location)
explained <- eigenvals(Ps_obj_ord2)/sum( eigenvals(Ps_obj_ord2)) * 100
explained <- as.numeric(format(round(explained, 1), nsmall = 1))
Ps_obj_filt_GMPR %>%
plot_ordination(., Ps_obj_ord2, type = "samples", shape = "Rock.type", color = "Location", justDF = TRUE) ->
ord_df
p_ord <- ggplot(ord_df,
aes(
x = CAP1,
y = MDS1,
shape = Rock.type,
color = Location
)) +
stat_ellipse(
aes(x = CAP1,
y = MDS1,
fill = Location
),
geom = "polygon",
alpha = 1/4,
type = "t",
level = 0.9,
# linetype = 2,
inherit.aes = FALSE
) +
geom_point(size = 4, alpha = 2 / 3) +
guides(colour = guide_legend(title = "Location"), shape = guide_legend(title = "Rock.type")) +
scale_colour_manual(values = Gradient.colours) +
scale_fill_manual(values = Gradient.colours, guide = "none") +
labs(x = sprintf("CAP1 (%s%%)", explained[1]),
y = sprintf("CAP2 (%s%%)", explained[2])) +
coord_fixed(ratio = sqrt(explained[2] / explained[1])) +
theme(legend.justification = "top")
# facet_wrap(. ~ Rock.type)
print(p_ord)Ps_obj_ord1 <- ordinate(Ps_obj_filt, "PCoA", "Unifrac", formula = Ps_obj_filt ~ Location * Rock.type)
Ps_obj_ord2 <- ordinate(Ps_obj_filt, "PCoA", "Unifrac", formula = Ps_obj_filt ~ Location)
explained <- Ps_obj_ord2$values$Relative_eig/sum(Ps_obj_ord2$values$Relative_eig) * 100
explained <- as.numeric(format(round(explained, 1), nsmall = 1))
Ps_obj_filt %>%
plot_ordination(., Ps_obj_ord2, type = "samples", shape = "Rock.type", color = "Location", justDF = TRUE) ->
ord_df
p_ord_phylo <- ggplot(ord_df,
aes(
x = Axis.1,
y = Axis.2,
shape = Rock.type,
color = Location
)) +
stat_ellipse(
aes(x = Axis.1,
y = Axis.2,
fill = Location
),
geom = "polygon",
alpha = 1/4,
type = "t",
level = 0.9,
# linetype = 2,
inherit.aes = FALSE
) +
geom_point(size = 4, alpha = 2 / 3) +
theme_bw(base_size = 14) +
guides(colour = guide_legend(title = "Location"), shape = guide_legend(title = "Rock.type")) +
scale_colour_manual(values = Gradient.colours) +
scale_fill_manual(values = Gradient.colours, guide = "none") +
labs(x = sprintf("CAP1 (%s%%)", explained[1]),
y = sprintf("CAP2 (%s%%)", explained[2])) +
coord_fixed(ratio = sqrt(explained[2] / explained[1])) #+
# facet_wrap(. ~ Rock.type)
print(p_ord_phylo)STAMPR analysis of the differences of each phylum between locations using Aligned Rank Transformed ANOVA test and a post-hoc estimated marginal means.
Taxa_tests_phylum1 <- STAMPR2(Ps_obj_filt, vars2test = "Location", threshold = 0.05, outputfile = paste0(Proj_name, "_Location"))
pSTAMPR1 <- plotSTAMPR(Taxa_tests_phylum1, pair = "Slope - City")
print(pSTAMPR1)Taxa_tests_phylum2 <- STAMPR2(Ps_obj_filt, vars2test = c("Location", "Rock.type"), threshold = 0.05, outputfile = paste0(Proj_name, "_Location_Rock"))
pSTAMPR2 <- plotSTAMPR(Taxa_tests_phylum2, pair = "Slope:Chalk - City:Chalk")
print(pSTAMPR2)Agglomerate data and tag rare taxa
Ps_obj_filt_GMPR %>%
transform_sample_counts(., function(x){x / sum(x)} * 100) %>%
tax_glom(.,
"Phylum",
NArm = TRUE) ->
Ps_obj_filt_GMPR_glom
Ps_obj_filt_GMPR_glom_DF <- speedyseq::psmelt(Ps_obj_filt_GMPR_glom)
Ps_obj_filt_GMPR_glom_DF$Phylum %<>% as.character()
# Ps_obj_filt3_glom_DF %<>% mutate(Species = fct_relevel(Species, "NA", after = Inf))
# group dataframe by Phylum, calculate sum rel. abundance
Ps_obj_filt_GMPR_glom_DF %>%
group_by(Phylum) %>%
summarise(Sum = sum(Abundance) / nsamples(Ps_obj_filt_GMPR_glom) ) ->
Sums
# find Phyla whose rel. abund. is less than 5%
Rare_phyla0.05 <- Sums[Sums$Sum <= 0.05, ]$Phylum
# change their name to "Rare"
Ps_obj_filt_GMPR_glom_DF[Ps_obj_filt_GMPR_glom_DF$Phylum %in% Rare_phyla0.05, ]$Phylum <- 'Rare'
# re-group
Ps_obj_filt_GMPR_glom_DF %>%
group_by(Sample, Phylum, Location, Rock.type, Location.rock) %>%
summarise(Abundance = sum(Abundance)) ->
Ps_obj_filt_GMPR_glom_DF_2plot
# ab.taxonomy$Freq <- sqrt(ab.taxonomy$Freq)
# Ps_obj_filt3_glom_rel_DF$Phylum %<>% sub("unclassified", "Unclassified", .)
# Ps_obj_filt3_glom_rel_DF$Phylum %<>% sub("uncultured", "Unclassified", .)
Ps_obj_filt_GMPR_glom_DF_2plot %>%
group_by(Sample) %>%
filter(Phylum == "Rare") %>%
summarise(`Rares (%)` = sum(Abundance)) ->
RaresSummarise taxonomy
# Percentage of reads classified as rare
Rares %>%
kable(., digits = c(2), caption = "Percentage of reads per sample classified as rare:") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F)|
Sample |
Rares (%) |
|---|---|
|
ShivSClk1S12 |
0.01 |
|
ShivSClk2S15 |
0.59 |
|
ShivSClk3S16 |
0.02 |
|
ShivSClk4S17 |
0.01 |
|
ShivSClk5S18 |
0.00 |
|
ShivSLimek1S13 |
0.02 |
|
ShivSLimek2S14 |
0.04 |
|
ShivSLimek3S19 |
0.11 |
|
ShivSLimek4S20 |
0.24 |
|
ShivSLimek5S21 |
0.01 |
|
ShivSLimek6S25 |
0.01 |
|
SlpClk1S3 |
0.24 |
|
SlpClk2S4 |
0.03 |
|
SlpClk3S5 |
0.03 |
|
SlpClk4S6 |
0.01 |
|
SlpClk5S7 |
0.01 |
|
SlpClk6S8 |
0.03 |
|
SlpClk7S9 |
0.01 |
|
SlpClk8S10 |
0.02 |
|
SlpClk9S11 |
0.03 |
|
SlpLime1S1 |
0.01 |
|
SlpLime2S2 |
0.02 |
|
SlpLime3S22 |
0.07 |
|
SlpLime4S23 |
0.01 |
|
SlpLime5S24 |
0.02 |
sample_order <- match(Rares$Sample, row.names(sample_data(Ps_obj_filt_GMPR_glom)))
Rares %<>% arrange(., sample_order)
Rares %>%
cbind(., sample_data(Ps_obj_filt_GMPR_glom)) %>%
group_by(Location.rock) %>%
setNames(make.names(names(.), unique = TRUE)) %>% # fails for some reason without it
summarise(`Rares (%)` = mean(`Rares....`)) ->
Rares_merged
# Percentage of reads classified as rare
Rares_merged %>%
kable(., digits = c(2), caption = "Percentage of reads per sample classified as rare:") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F)|
Location.rock |
Rares (%) |
|---|---|
|
City:Chalk |
0.11 |
|
City:Limestone |
0.07 |
|
Slope:Chalk |
0.02 |
|
Slope:Limestone |
0.06 |
Plot taxonomy box-plot
Ps_obj_filt_GMPR_glom_DF_2plot %>%
group_by(Phylum) %>%
summarise(sum.Taxa = sum(Abundance)) %>%
arrange(desc(sum.Taxa)) -> Taxa_rank
Ps_obj_filt_GMPR_glom_DF_2plot$Phylum %<>%
factor(., levels = Taxa_rank$Phylum) %>%
fct_relevel(., "Rare", after = Inf)
p_taxa_box <-
ggplot(Ps_obj_filt_GMPR_glom_DF_2plot, aes(x = Phylum, y = (Abundance))) +
geom_boxplot(aes(group = interaction(Phylum, Location)), position = position_dodge(width = 0.9), fatten = 1) +
geom_point(
aes(colour = Rock.type),
position = position_jitterdodge(dodge.width = 1),
alpha = 1 / 2,
stroke = 0,
size = 2
) +
scale_colour_manual(values = Gradient.colours, name = "") +
theme_bw()+
# theme_cowplot(font_size = 11, font_family = f_name) +
labs(x = NULL, y = "Relative abundance (%)") +
guides(colour = guide_legend(override.aes = list(size = 5))) +
facet_grid(Location ~ .) +
background_grid(major = "xy",
minor = "none") +
theme(axis.text.x = element_text(
angle = 45,
vjust = 0.9,
hjust = 0.9
),
legend.position = c(.99, .99),
legend.justification = c("right", "top"),
legend.box.just = "top",
legend.margin = margin(0, 3, 3, 3))
print(p_taxa_box)Tag rare phyla (for plotting purposes only)
Ps_obj_filt_GMPR_glom <- tax_glom(Ps_obj_filt_GMPR,
"Phylum",
NArm = TRUE) # glomerate to the phylum level
Ps_obj_filt_GMPR_glom_rel <- transform_sample_counts(Ps_obj_filt_GMPR_glom, function(x) x / sum(x)) # transform to rel. ab.
Ps_obj_filt_GMPR_glom_rel_DF <- speedyseq::psmelt(Ps_obj_filt_GMPR_glom_rel) # generate a df
Ps_obj_filt_GMPR_glom_rel_DF$Phylum %<>% as.character() # factor to char
# group dataframe by Phylum, calculate sum rel. abundance
Ps_obj_filt_GMPR_glom_rel_DF %>%
group_by(Phylum) %>%
summarise(Sum = sum(Abundance) / nsamples(Ps_obj_filt_GMPR_glom) ) ->
Sums
# find Phyla whose mean rel. abund. is less than 0.5%
Rare_phyla0.05 <- Sums[Sums$Sum <= 0.05, ]$Phylum
# change their name to "Rare"
Ps_obj_filt_GMPR_glom_rel_DF[Ps_obj_filt_GMPR_glom_rel_DF$Phylum %in% Rare_phyla0.05, "Phylum"] <- 'Rare'
# re-group
Ps_obj_filt_GMPR_glom_rel_DF %>%
group_by(Phylum) %>%
summarise(Abundance = sum(Abundance)) %>%
arrange(desc(Abundance)) -> Taxa_rankDetect differentially abundant OTUs using corncob (Martin, Witten, and Willis 2020)
comparison_string <- c("City", "Slope")
Ps_obj_filt %>%
subset_samples(Location %in% c(comparison_string[1], comparison_string[2])) %>%
tax_glom("Order") ->
Ps_obj_filt_pairwise_glom
# Test differential abundance for location
da_Loc <- differentialTest(formula = ~ Location,
phi.formula = ~ Location,
formula_null = ~ 1,
phi.formula_null = ~ Location,
test = "Wald", boot = FALSE,
data = Ps_obj_filt,
fdr_cutoff = 0.05,
full_output = TRUE)
da_Loc_intervals <- plot(da_Loc, level = "Class", data_only = T)
which(is.na(da_Loc$p)) %>% names## [1] "OTU398" "OTU164" "OTU476" "OTU641" "OTU239" "OTU481" "OTU727" "OTU245"
Ps_obj_filt %>%
transform_sample_counts(., function(x) x / sum(x) * 100) %>%
taxa_sums(.) %>%
map_dbl(~(.x / nsamples(Ps_obj_filt))) %>%
enframe(name = "OTU", value = "Mean abundance (%)") ->
baseMean
map(da_Loc$all_models,15) %>%
map(.,2) %>%
unlist %>% # grab all mu.LocationSlope Estimates (differences in estimated population relative abundance)
bind_cols(OTU = taxa_names(Ps_obj_filt),
tax_table(Ps_obj_filt),
`Differential abundance` = .,
Significance = fct_recode(as_factor(taxa_names(Ps_obj_filt) %in% da_Loc$significant_taxa), Pass = "TRUE", Fail = "FALSE"),
ymin = as.numeric(NA),
ymax = as.numeric(NA)
) %>%
left_join(., baseMean, by = "OTU") ->
da_Loc_df
da_Loc_df %<>% rows_update(., tibble(ymin = da_Loc_intervals$xmin, OTU = da_Loc$significant_taxa), by = "OTU")
da_Loc_df %<>% rows_update(., tibble(ymax = da_Loc_intervals$xmax, OTU = da_Loc$significant_taxa), by = "OTU")
da_Loc_df[da_Loc_df$Phylum %in% Rare_phyla0.05, "Phylum"] <- 'Rare' # rare_phyla is
p_corncob_loc <- GGPlotCorncob(da_Loc_df, OTU_labels = FALSE, Taxa = "Phylum", Y_val = "Differential abundance", sig_level = 0.05, Rank = Taxa_rank)
corncob_summary <- tibble(Label = c(paste0("⬆", sum(da_Loc_df$`Differential abundance` > 0 & da_Loc_df$Significance == "Pass"), " ⬇", sum(da_Loc_df$`Differential abundance` < 0 & da_Loc_df$Significance == "Pass"), " (", nrow(da_Loc_df), ")")))
p_corncob_loc <- p_corncob_loc +
labs(title = paste(comparison_string, collapse = " - ")) +
coord_cartesian(ylim = c(-15, 15))
print(p_corncob_loc)Modelling differential abundance and variance between locations discovered length(da_Loc$significant_taxa)
comparison_string <- c("Limestone", "Chalk")
# Test differential abundance and variance for rock type
da_Rock <- differentialTest(formula = ~ Rock.type,
phi.formula = ~ Rock.type,
formula_null = ~ 1,
phi.formula_null = ~ Rock.type,
test = "Wald", boot = FALSE,
data = Ps_obj_filt,
fdr_cutoff = 0.05,
full_output = TRUE)
da_Rock_intervals <- plot(da_Rock, level = "Class", data_only = TRUE)
which(is.na(da_Rock$p)) %>% names## [1] "OTU780" "OTU164" "OTU580" "OTU679" "OTU696" "OTU712" "OTU519" "OTU503" "OTU542"
## [10] "OTU245" "OTU229"
map(da_Rock$all_models,15) %>%
map(.,2) %>%
unlist %>% # grab all mu.LocationSlope Estimates (differences in estimated population relative abundance)
bind_cols(OTU = taxa_names(Ps_obj_filt),
tax_table(Ps_obj_filt),
`Differential abundance` = .,
Significance = fct_recode(as_factor(taxa_names(Ps_obj_filt) %in% da_Rock$significant_taxa), Pass = "TRUE", Fail = "FALSE"),
ymin = as.numeric(NA),
ymax = as.numeric(NA)
) %>%
left_join(., baseMean, by = "OTU") ->
da_Rock_df
da_Rock_df %<>% rows_update(., tibble(ymin = da_Rock_intervals$xmin, OTU = da_Rock$significant_taxa), by = "OTU")
da_Rock_df %<>% rows_update(., tibble(ymax = da_Rock_intervals$xmax, OTU = da_Rock$significant_taxa), by = "OTU")
da_Rock_df[da_Rock_df$Phylum %in% Rare_phyla0.05, "Phylum"] <- 'Rare' # rare_phyla is
p_corncob_rock <- GGPlotCorncob(da_Rock_df, OTU_labels = FALSE, Taxa = "Phylum", Y_val = "Differential abundance", sig_level = 0.05, Rank = Taxa_rank)
corncob_summary <- tibble(Label = c(paste0("⬆", sum(da_Rock_df$`Differential abundance` > 0 & da_Rock_df$Significance == "Pass"), " ⬇", sum(da_Rock_df$`Differential abundance` < 0 & da_Rock_df$Significance == "Pass"), " (", nrow(da_Rock_df), ")")))
p_corncob_rock <- p_corncob_rock +
labs(title = paste(comparison_string, collapse = " - ")) +
coord_cartesian(ylim = c(-15, 15))
print(p_corncob_rock)Modelling differential abundance and variance between rock types discovered length(da_Rock$significant_taxa)
# Test differential abundance for location, control for Rock.type for both cases
comparison_string <- c("City", "Slope")
da_Loc_exRock <- differentialTest(formula = ~ Location + Rock.type,
phi.formula = ~ Location + Rock.type,
formula_null = ~ Rock.type,
phi.formula_null = ~ Location + Rock.type,
test = "Wald", boot = FALSE,
data = Ps_obj_filt,
fdr_cutoff = 0.05,
full_output = TRUE)
da_Loc_exRock_intervals <- plot(da_Loc_exRock, level = "Class", data_only = TRUE)
which(is.na(da_Loc_exRock$p)) %>% names## [1] "OTU780" "OTU398" "OTU164" "OTU476" "OTU580" "OTU679" "OTU696" "OTU712" "OTU641"
## [10] "OTU519" "OTU239" "OTU481" "OTU503" "OTU542" "OTU727" "OTU245" "OTU229"
map(da_Loc_exRock$all_models, 15) %>%
map(., 2) %>%
unlist %>% # grab all mu.LocationSlope Estimates (differences in estimated population relative abundance)
bind_cols(OTU = taxa_names(Ps_obj_filt),
tax_table(Ps_obj_filt),
`Differential abundance` = .,
Significance = fct_recode(as_factor(taxa_names(Ps_obj_filt) %in% da_Loc_exRock$significant_taxa), Pass = "TRUE", Fail = "FALSE"),
ymin = as.numeric(NA),
ymax = as.numeric(NA)
) %>%
left_join(., baseMean, by = "OTU") ->
da_Loc_exRock_df
da_Loc_exRock_df %<>% rows_update(., tibble(ymin = da_Loc_exRock_intervals$xmin, OTU = da_Loc_exRock$significant_taxa), by = "OTU")
da_Loc_exRock_df %<>% rows_update(., tibble(ymax = da_Loc_exRock_intervals$xmax, OTU = da_Loc_exRock$significant_taxa), by = "OTU")
da_Loc_exRock_df[da_Loc_exRock_df$Phylum %in% Rare_phyla0.05, "Phylum"] <- 'Rare' # rare_phyla is
p_corncob_locExroc <- GGPlotCorncob(da_Loc_exRock_df, OTU_labels = FALSE, Taxa = "Phylum", Y_val = "Differential abundance", sig_level = 0.05, Rank = Taxa_rank)
corncob_summary <- tibble(Label = c(paste0("⬆", sum(da_Loc_exRock_df$`Differential abundance` > 0 & da_Loc_exRock_df$Significance == "Pass"), " ⬇", sum(da_Loc_exRock_df$`Differential abundance` < 0 & da_Loc_exRock_df$Significance == "Pass"), " (", nrow(da_Loc_exRock_df), ")")))
p_corncob_locExroc <- p_corncob_locExroc +
labs(title = paste(comparison_string, collapse = " - ")) +
coord_cartesian(ylim = c(-15, 15))
print(p_corncob_locExroc)Modelling differential abundance between locations, while controlling for rock type discovered length(da_Loc_exRock$significant_taxa)
mod260 <- bbdml(formula = OTU260 ~ 1,
phi.formula = ~ 1,
data = Ps_obj_filt)
mod260_Loc <- bbdml(formula = OTU260 ~ Location,
phi.formula = ~ Location,
data = Ps_obj_filt)
mod260_Loc_rock <- bbdml(formula = OTU97 ~ Location*Rock.type,
phi.formula = ~ Location*Rock.type,
data = Ps_obj_filt)
lrtest(mod_null = mod260, mod = mod260_Loc)## [1] 0.004013869
##
## Call:
## bbdml(formula = OTU260 ~ Location, phi.formula = ~Location, data = Ps_obj_filt)
##
##
## Coefficients associated with abundance:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.8860 0.4137 -19.060 9.8e-15 ***
## LocationCity -1.3161 0.4812 -2.735 0.0124 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Coefficients associated with dispersion:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.1074 0.5436 -13.075 1.47e-11 ***
## LocationCity -2.7580 0.8110 -3.401 0.00269 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Log-likelihood: -80.859
plot(mod260_Loc, color = "Location", shape = "Rock.type") # add total = TRUE for total counts (i.e. not relative abundance)composite_plot <- ((p_alpha + p_taxa_box + plot_layout(widths = c(1, 2))) /(p_ord + pSTAMPR1) + plot_annotation(tag_levels = 'A') & theme(plot.tag = element_text(size = f_size)))
plot_file <- "./Results/Microbiome_1"
svglite(paste0(plot_file, ".svg"),
width = 14,
height = 10.5)
print(composite_plot)
dev.off()## pdf
## 2
ggsave(
paste0(plot_file, ".png"),
composite_plot,
device = agg_png,
width = 14,
height = 10.5,
units = "cm",
res = 900,
scaling = 0.38
)
gz(paste0(plot_file, ".svg"), paste0(plot_file, ".svgz"))
knitr::include_graphics(paste0(plot_file, ".png"))plot_file <- "./Results/Microbiome_2"
svglite(paste0(plot_file, ".svg"),
width = 12,
height = 10)
print(p_corncob_locExroc)
dev.off()## pdf
## 2
ggsave(
paste0(plot_file, ".png"),
p_corncob_locExroc,
device = agg_png,
width = 12,
height = 10,
units = "cm",
res = 900,
scaling = 0.38
)
gz(paste0(plot_file, ".svg"), paste0(plot_file, ".svgz"))Chen, Jun, and Li Chen. 2017. “GMPR: A novel normalization method for microbiome sequencing data.” bioRxiv, February, 112565. https://doi.org/10.1101/112565.
Martin, Bryan D., Daniela Witten, and Amy D. Willis. 2020. “Modeling Microbial Abundances and Dysbiosis with Beta-Binomial Regression.” Annals of Applied Statistics 14 (1): 94–115. https://doi.org/10.1214/19-AOAS1283.